# This file contains the code to reproduce the analysis reported in sections 4 and 5, and appendices D and E of the article ``Environmental Policy and Directed Technological Change: Evidence from the European carbon market''. For convenience, the script is organised as far as possible to follow the order in which results are presented in the article.
# NB: Some variables have been removed from the data sets, preventing complete replication. Relevant lines of code have been left in for completeness, but have been commented out sp as not to cause problems for the user.

############### Preamble ###############
rm(list=ls())
require(coin)
require(data.table)
load("EUETS_innovation_public.RData")

# Define function to compute Wilcoxon's signed-rank p-values for Tobit-adjusted difference-in-differences treatment effects model
tobit.wilcox <- function(pre.trt, post.trt, pre.ctrl, post.ctrl, effect.min, effect.max, step = 0.01, distribution = c("exact", "asympt", "approx")) {
	p.values <- vector()
	for (i in seq(from = effect.min, to = effect.max, by = step)) { # Compute the p-value for each mu in the range [0,8].
		mu=i
		if (mu>=0) {
			trt <- pmax((post.trt - pre.trt) - mu, -pre.trt)
			ctrl <- (post.ctrl - pre.ctrl)
		}
		if (mu<0) {
			trt <- (post.trt - pre.trt)
			ctrl <- pmax((post.ctrl - pre.ctrl) + mu, -pre.ctrl)
		}
		p.values <- append(p.values, pvalue(wilcoxsign_test(trt ~ ctrl, zero.method = c("Pratt"), distribution = distribution)))
	}
	cbind(seq(from = effect.min, to = effect.max, by = step), p.values)
}


############### Replication code for section 4.1 ###############
# Figure 4: Comparison of matched EU ETS firms and non-EU ETS firms
quartz(width = 8.4, height = 3.3, "balanceplots", file = "balanceplots.pdf", type = "pdf")
par(mfrow=c(nrows=1,ncols=3), oma = c(0,0,0,0))
# qqplot(log(cs_w1$revenue0104), log(ts_w1$revenue0104), xlim=c(min(min(log(cs_w1$revenue0104)),min(log(ts_w1$revenue0104))),max(max(log(cs_w1$revenue0104)),max(log(ts_w1$revenue0104)))), ylim=c(min(min(log(cs_w1$revenue0104)),min(log(ts_w1$revenue0104))),max(max(log(cs_w1$revenue0104)),max(log(ts_w1$revenue0104)))), xlab = "Turnover of non-EU ETS firms (Mil. Euro)", ylab = "Turnover of EU ETS firms (Mil. Euro)", main = "(a)", xaxt = "n", yaxt = "n")
# axis(side = 1, at = c(log(1),log(100),log(10000),log(1000000),log(100000000)), labels = c("0","0.1","10","1'000","100'000"))
# axis(side = 2, at = c(log(1),log(100),log(10000),log(1000000),log(100000000)), labels = c("0","0.1","10","1'000","100'000"))
# abline(coef=c(0,1), col=2)
qqplot(log(cs_w1$patent_count+1), log(ts_w1$patent_count+1), xlim=c(0,max(max(log(cs_w1$patent_count+1)),max(log(ts_w1$patent_count+1)))), ylim=c(0,max(max(log(cs_w1$patent_count+1)),max(log(ts_w1$patent_count+1)))), xlab = "Patents by non-EU ETS firms", ylab = "Patents by EU ETS firms", main = "(b)", xaxt = "n", yaxt = "n") # To plot patent counts on a log scale, one is added to all observations first. Axes are adjusted appropriately below.
axis(side = 1, at = c(log(1),log(11),log(101),log(1001),log(10001)), labels = c("0","10","100","1'000","10'000"))
axis(side = 2, at = c(log(1),log(11),log(101),log(1001),log(10001)), labels = c("0","10","100","1'000","10'000"))
abline(coef=c(0,1), col=2)
qqplot(log(cs_w1$green_patent_count+1), log(ts_w1$green_patent_count+1), xlim=c(0,max(max(log(cs_w1$green_patent_count+1)),max(log(ts_w1$green_patent_count+1)))), ylim=c(0,max(max(log(cs_w1$green_patent_count+1)),max(log(ts_w1$green_patent_count+1)))), xlab = "Low-carbon patents by non-EU ETS firms", ylab = "Low-carbon patents by EU ETS firms", main = "(c)", xaxt = "n", yaxt = "n")  # To plot patent counts on a log scale, one is added to all observations first. Axes are adjusted appropriately below.
axis(side = 1, at = c(log(1),log(11),log(101)), labels = c("0","10","100"))
axis(side = 2, at = c(log(1),log(11),log(101)), labels = c("0","10","100"))
abline(coef=c(0,1), col=2)
dev.off()

# Table 2: Equivalence tests for matched EU ETS and non-EU ETS firms
# ## Turnover
# median(ts_w1$revenue0104 - cs_w1$revenue0104) # Median difference
#
# ### Equivalence range (±0.2 standard deviations of the pooled sample)
# 0.2*sd(append(ts_w1$revenue0104, cs_w1$revenue0104)
#
# ### Critical equivalence range (5% significance level)
# mu = 13248
# a <- pmax(ts_w1$revenue0104 - mu, 0)
# b <- pmax(cs_w1$revenue0104 - mu, 0)
# wilcoxsign_test(a ~ cs_w1$revenue0104, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w1$revenue0104 ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
# 
# ### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
# a <- pmax(ts_w1$revenue0104 - 0.2*sd(append(ts_w1$revenue0104, cs_w1$revenue0104)), 0)
# b <- pmax(cs_w1$revenue0104 - 0.2*sd(append(ts_w1$revenue0104, cs_w1$revenue0104)), 0)
# wilcoxsign_test(a ~ cs_w1$revenue0104, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w1$revenue0104 ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
#
# ### Equivalence test using t-test for difference in means (not reported)
# t.test(ts_w1$revenue0104, cs_w1$revenue0104, alternative = c("less"), mu = 0.2*sd(append(ts_w1$revenue0104, cs_w1$revenue0104)))
# t.test(ts_w1$revenue0104, cs_w1$revenue0104, alternative = c("greater"), mu = -0.2*sd(append(ts_w1$revenue0104, cs_w1$revenue0104)))
#
## Patent count
median(ts_w1$patent_count - cs_w1$patent_count) # Median difference

### Equivalence range (±0.2 standard deviations of the pooled sample)
0.2*sd(append(ts_w1$patent_count, cs_w1$patent_count))

### Critical equivalence range (5% significance level)
mu = 1.99 # We are able to reject both directional hypotheses for mu=2, but not for mu=1.99.
a <- pmax(ts_w1$patent_count - mu, 0)
b <- pmax(cs_w1$patent_count - mu, 0)
wilcoxsign_test(a ~ cs_w1$patent_count, alternative = "less", zero.method = c("Pratt"), distribution = c("exact"))
wilcoxsign_test(ts_w1$patent_count ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("exact"))

### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
a <- pmax(ts_w1$patent_count - 0.2*sd(append(ts_w1$patent_count, cs_w1$patent_count)), 0)
b <- pmax(cs_w1$patent_count - 0.2*sd(append(ts_w1$patent_count, cs_w1$patent_count)), 0)
wilcoxsign_test(a ~ cs_w1$patent_count, alternative = "less", zero.method = c("Pratt"), distribution = c("exact"))
wilcoxsign_test(ts_w1$patent_count ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("exact"))

### Equivalence test using t-test for difference in means (not reported)
t.test(ts_w1$patent_count, cs_w1$patent_count, alternative = c("less"), mu = 0.2*sd(append(ts_w1$patent_count, cs_w1$patent_count)))
t.test(ts_w1$patent_count, cs_w1$patent_count, alternative = c("greater"), mu = -0.2*sd(append(ts_w1$patent_count, cs_w1$patent_count)))

## Low-carbon patent count
median(ts_w1$green_patent_count - cs_w1$green_patent_count) # Median difference

### Equivalence range (±0.2 standard deviations of the pooled sample)
0.2*sd(append(ts_w1$green_patent_count, cs_w1$green_patent_count))

### Critical equivalence range (5% significance level)
mu = 1.99
a <- pmax(ts_w1$green_patent_count - mu, 0)
b <- pmax(cs_w1$green_patent_count - mu, 0)
wilcoxsign_test(a ~ cs_w1$green_patent_count, alternative = "less", zero.method = c("Pratt"), distribution = c("exact"))
wilcoxsign_test(ts_w1$green_patent_count ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("exact"))

### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
a <- pmax(ts_w1$green_patent_count - 0.2*sd(append(ts_w1$green_patent_count, cs_w1$green_patent_count)), 0)
b <- pmax(cs_w1$green_patent_count - 0.2*sd(append(ts_w1$green_patent_count, cs_w1$green_patent_count)), 0)
wilcoxsign_test(a ~ cs_w1$green_patent_count, alternative = "less", zero.method = c("Pratt"), distribution = c("exact"))
wilcoxsign_test(ts_w1$green_patent_count ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("exact"))

wilcoxsign_test(ts_w1$green_patent_count ~ cs_w1$green_patent_count, zero.method = c("Pratt"), distribution = c("exact")) # As noted in section 4.1, while we are unable to reject the hypothesis that the difference in low-carbon patenting between EU ETS and non-EU ETS firms lies in the standard equivalence range (see above), neither can we reject (at the 5% level) the hypothesis that the difference is zero.

### Equivalence test using t-test for difference in means (not reported)
t.test(ts_w1$green_patent_count, cs_w1$green_patent_count, alternative = c("less"), mu = 0.2*sd(append(ts_w1$green_patent_count, cs_w1$green_patent_count)))
t.test(ts_w1$green_patent_count, cs_w1$green_patent_count, alternative = c("greater"), mu = -0.2*sd(append(ts_w1$green_patent_count, cs_w1$green_patent_count)))

# ## Year of incorporation
# median(ts_w1$yearofincorporation - cs_w1$yearofincorporation, na.rm=T) # Median difference
#
# ### Equivalence range (±0.2 standard deviations of the pooled sample)
# 0.2*sd(append(ts_w1$yearofincorporation, cs_w1$yearofincorporation), na.rm=T)
#
# ### Critical equivalence range (5% significance level)
# mu = 0.49
# a <- pmax(ts_w1$yearofincorporation - mu, 0)
# b <- pmax(cs_w1$yearofincorporation - mu, 0)
# wilcoxsign_test(a ~ cs_w1$yearofincorporation, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w1$yearofincorporation ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
#
# ### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
# a <- pmax(ts_w1$yearofincorporation - 0.2*sd(append(ts_w1$yearofincorporation, cs_w1$yearofincorporation), na.rm=T), 0)
# b <- pmax(cs_w1$yearofincorporation - 0.2*sd(append(ts_w1$yearofincorporation, cs_w1$yearofincorporation), na.rm=T), 0)
# wilcoxsign_test(a ~ cs_w1$yearofincorporation, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w1$yearofincorporation ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
#
# ### Equivalence test using t-test for difference in means (not reported)
# t.test(ts_w1$yearofincorporation, cs_w1$yearofincorporation, alternative = c("less"), mu = 0.2*sd(append(ts_w1$yearofincorporation, cs_w1$yearofincorporation), na.rm=T))
# t.test(ts_w1$yearofincorporation, cs_w1$yearofincorporation, alternative = c("greater"), mu = -0.2*sd(append(ts_w1$yearofincorporation, cs_w1$yearofincorporation), na.rm=T))


############### Replication code for section 4.2 ###############
# Table 3: Summary of results
## Estimate treatment effect for low-carbon patents
a <- ts_w1$green_patent_count # Low-carbon patents by EU ETS firms 2000-2004
b <- cs_w1$green_patent_count # Low-carbon patents by non-EU ETS firms 2000-2004
c <- ts_w1$epo_green_pat_epo0507 + ts_w1$epo_green_pat_epo0809 # Low-carbon patents by EU ETS firms 2005-2009
d <- cs_w1$epo_green_pat_epo0507 + cs_w1$epo_green_pat_epo0809 # Low-carbon patents by non-EU ETS firms 2005-2009
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=8, step = 0.01, distribution = "exact")
pe <- mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 2
lci <- min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 1
uci <- max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 5

# ### Plot p-values. The p-value at the point estimate is p=0.9106882.
# plot(p.values[,1], p.values[,2], type="l")
# abline(h=0.05)

### Calculate aggregate number of patents added for matched EU ETS firms
sum(c) - sum(pmax(c - pe, 0)) # Point estimate = 84
sum(c) - sum(pmax(c - lci, 0)) # Lower bound of CI = 53
sum(c) - sum(pmax(c - uci, 0)) # Upper bound of CI = 129

### Calculate aggregate change (in percent) for matched EU ETS firms
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (sum(pmax(c - pe, 0))) # Point estimate = 36.2069
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (sum(pmax(c - lci, 0))) # Lower bound of CI = 20.15209
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (sum(pmax(c - uci, 0))) # Upper bound of CI = 68.98396

### Calculate aggregate change (in percent) for EPO
lc_epo <- sum(descriptive$tot_epo_green_pat_Y02[descriptive$year>2004]) # 22309 low-carbon patents filed at the EPO in 2005-2009
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (lc_epo - (sum(c) - (sum(pmax(c - pe, 0)))))  # Point estimate = 0.3779528
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (lc_epo - (sum(c) - (sum(pmax(c - lci, 0)))))  # Lower bound of CI = 0.238138
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (lc_epo - (sum(c) - (sum(pmax(c - uci, 0)))))  # Upper bound of CI = 0.581605

## Extrapolate to all 5'568 EU ETS firms in our data set
### Calculate aggregate number of patents added for all EU ETS firms
sum(ETSpatent_counts$green_pat0509) - sum(pmax(ETSpatent_counts$green_pat0509 - pe, 0)) # Point estimate = 183
sum(ETSpatent_counts$green_pat0509) - sum(pmax(ETSpatent_counts$green_pat0509 - lci, 0)) # Lower bound of CI = 111
sum(ETSpatent_counts$green_pat0509) - sum(pmax(ETSpatent_counts$green_pat0509 - uci, 0)) # Upper bound of CI = 299

### Calculate aggregate change (in percent) for all EU ETS firms
100*(sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - pe, 0)))) / (sum(pmax(ETSpatent_counts$green_pat0509 - pe, 0))) # Point estimate = 9.122632
100*(sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - lci, 0)))) / (sum(pmax(ETSpatent_counts$green_pat0509 - lci, 0))) # Lower bound of CI = 5.341675
100*(sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - uci, 0)))) / (sum(pmax(ETSpatent_counts$green_pat0509 - uci, 0))) # Upper bound of CI = 15.82011

### Calculate aggregate change (in percent) for EPO
100*(sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - pe, 0)))) / (lc_epo - (sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - pe, 0)))))  # Point estimate = 0.8270813
100*(sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - lci, 0)))) / (lc_epo - (sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - lci, 0)))))  # Lower bound of CI = 0.500045
100*(sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - uci, 0)))) / (lc_epo - (sum(ETSpatent_counts$green_pat0509) - (sum(pmax(ETSpatent_counts$green_pat0509 - uci, 0))))) # Upper bound of CI = 1.358473

## Estimate treatment effect for other patents
a <- ts_w1$patent_count - ts_w1$green_patent_count # Other patents by EU ETS firms 2000-2004
b <- cs_w1$patent_count - cs_w1$green_patent_count # Other patents by non-EU ETS firms 2000-2004
c <- ts_w1$epo_total_pat0507 + ts_w1$epo_total_pat0809 - ts_w1$epo_green_pat_epo0507 - ts_w1$epo_green_pat_epo0809 # Other patents by EU ETS firms 2005-2009
d <- cs_w1$epo_total_pat0507 + cs_w1$epo_total_pat0809 - cs_w1$epo_green_pat_epo0507 - cs_w1$epo_green_pat_epo0809 # Other patents by non-EU ETS firms 2005-2009
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=5, step = 0.01, distribution = "exact")
pe <- mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 1
lci <- min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 1
uci <- max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 1.99

# ### Plot p-values. The p-value at the point estimate is p=0.2929262.
# plot(seq(from = 0, to = 5, by = .01), p.values, type="l")
# abline(h=0.05)

### Calculate aggregate number of patents added for matched EU ETS firms
sum(c) - sum(pmax(c - pe, 0)) # Point estimate = 305
sum(c) - sum(pmax(c - lci, 0)) # Lower bound of CI = 305
sum(c) - sum(pmax(c - uci, 0)) # Upper bound of CI = 512.9

### Calculate aggregate change (in percent) for matched EU ETS firms
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (sum(pmax(c - pe, 0))) # Point estimate = 1.886092
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (sum(pmax(c - lci, 0))) # Lower bound of CI = 1.886092
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (sum(pmax(c - uci, 0))) # Upper bound of CI = 3.213035

### Calculate aggregate change (in percent) for EPO
other_epo <- sum(descriptive$tot_epo_pat[descriptive$year>2004]) - sum(descriptive$tot_epo_green_pat_Y02[descriptive$year>2004])
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (other_epo - (sum(c) - (sum(pmax(c - pe, 0))))) # Point estimate = 0.04053027
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (other_epo - (sum(c) - (sum(pmax(c - lci, 0))))) # Lower bound of CI = 0.04053027
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (other_epo - (sum(c) - (sum(pmax(c - uci, 0))))) # Upper bound of CI = 0.06817613

## Extrapolate to all 5'568 EU ETS firms
### Calculate aggregate number of patents added for all EU ETS firms
ETSpatent_counts$other_patents0509 <- ETSpatent_counts$total_pat0509 - ETSpatent_counts$green_pat0509
sum(ETSpatent_counts$other_patents0509) - sum(pmax(ETSpatent_counts$other_patents0509 - pe, 0)) # Point estimate = 541
sum(ETSpatent_counts$other_patents0509) - sum(pmax(ETSpatent_counts$other_patents0509 - lci, 0)) # Lower bound of CI = 541
sum(ETSpatent_counts$other_patents0509) - sum(pmax(ETSpatent_counts$other_patents0509 - uci, 0)) # Upper bound of CI = 934.03

### Calculate aggregate change (in percent) for all EU ETS firms
100*(sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - pe, 0)))) / (sum(pmax(ETSpatent_counts$other_patents0509 - pe, 0))) # Point estimate = 0.8296274
100*(sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - lci, 0)))) / (sum(pmax(ETSpatent_counts$other_patents0509 - lci, 0))) # Lower bound of CI = 0.8296274
100*(sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - uci, 0)))) / (sum(pmax(ETSpatent_counts$other_patents0509 - uci, 0))) # Upper bound of CI = 1.441027

### Calculate aggregate change (in percent) for EPO
100*(sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - pe, 0)))) / (other_epo - (sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - pe, 0))))) # Point estimate = 0.07191395
100*(sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - lci, 0)))) / (other_epo - (sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - lci, 0))))) # Lower bound of CI = 0.07191395
100*(sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - uci, 0)))) / (other_epo - (sum(ETSpatent_counts$other_patents0509) - (sum(pmax(ETSpatent_counts$other_patents0509 - uci, 0))))) # Upper bound of CI = 0.1242235

## Naive estimates
sum(descriptive$greenpat_epo_ets[descriptive$year>2004]) - (sum(descriptive$greenpat_epo_nonets[descriptive$year>2004]) / sum(descriptive$greenpat_epo_nonets[descriptive$year>1999 & descriptive$year<2005]))*sum(descriptive$greenpat_epo_ets[descriptive$year>1999 & descriptive$year<2005]) # Additional low-carbon patents. The rounded number reported in the article is 585.2 (see 2_Descriptive_analysis.R for details).
100*585.2 / (sum(ETSpatent_counts$green_pat0509) - 585.2) # As percentage increase = 36.48834
# 100*(sum(descriptive$greenpat_epo_ets[descriptive$year>2004]) - (sum(descriptive$greenpat_epo_nonets[descriptive$year>2004]) / sum(descriptive$greenpat_epo_nonets[descriptive$year>1999 & descriptive$year<2005]))*sum(descriptive$greenpat_epo_ets[descriptive$year>1999 & descriptive$year<2005])) / (sum(ETSpatent_counts$green_pat0509) - (sum(descriptive$greenpat_epo_ets[descriptive$year>2004]) - (sum(descriptive$greenpat_epo_nonets[descriptive$year>2004]) / sum(descriptive$greenpat_epo_nonets[descriptive$year>1999 & descriptive$year<2005]))*sum(descriptive$greenpat_epo_ets[descriptive$year>1999 & descriptive$year<2005]))) # A more precise estimate = 36.62627
100*585.2 / (lc_epo - 585.2) # As percentage increase of EPO = 2.69382
# 100*(sum(descriptive$greenpat_epo_ets[descriptive$year>2004]) - (sum(descriptive$greenpat_epo_nonets[descriptive$year>2004]) / sum(descriptive$greenpat_epo_nonets[descriptive$year>1999 & descriptive$year<2005]))*sum(descriptive$greenpat_epo_ets[descriptive$year>1999 & descriptive$year<2005])) / (lc_epo - (sum(descriptive$greenpat_epo_ets[descriptive$year>2004]) - (sum(descriptive$greenpat_epo_nonets[descriptive$year>2004]) / sum(descriptive$greenpat_epo_nonets[descriptive$year>1999 & descriptive$year<2005]))*sum(descriptive$greenpat_epo_ets[descriptive$year>1999 & descriptive$year<2005]))) # A more precise estimate = 2.701474
sum(descriptive$total_patents_ets[descriptive$year>2004] - descriptive$greenpat_epo_ets[descriptive$year>2004]) - (sum(descriptive$total_patents_nonets[descriptive$year>2004] - descriptive$greenpat_epo_nonets[descriptive$year>2004]) / sum(descriptive$total_patents_nonets[descriptive$year>1999 & descriptive$year<2005] - descriptive$greenpat_epo_nonets[descriptive$year>1999 & descriptive$year<2005]))*sum(descriptive$total_patents_ets[descriptive$year>1999 & descriptive$year<2005] - descriptive$greenpat_epo_ets[descriptive$year>1999 & descriptive$year<2005]) # Additional other patents = 9072.803
100*9072.8 / (sum(ETSpatent_counts$total_pat0509) - sum(ETSpatent_counts$green_pat0509) - 9072.8) # As percentage increase = 16.00757
100*9072.8 / (other_epo - 9072.8) # As percentage increase of EPO = 1.219862


############### Replication code for section 4.3 ###############
# Figure 6: Comparison of matched EU ETS and non-EU ETS firms on 'unobserved' variable
# quartz(width = 4, height = 4.5, "balanceemployees", file = "balanceemployees.pdf", type = "pdf")
# qqplot(log(cs_w1$employees0104), log(ts_w1$employees0104), xlim=c(0,max(max(log(cs_w1$employees0104), na.rm=T),max(log(ts_w1$employees0104), na.rm=T))), ylim=c(0,max(max(log(cs_w1$employees0104), na.rm=T),max(log(ts_w1$employees0104), na.rm=T))), xlab = "Employees of non-EU ETS firms", ylab = "Employees of EU ETS firms", xaxt = "n", yaxt = "n")
# axis(side = 1, at = c(log(1), log(10),log(100),log(1000),log(10000),log(100000)), labels = c("1","","100","","10'000",""))
# axis(side = 2, at = c(log(1), log(10),log(100),log(1000),log(10000),log(100000)), labels = c("1","","100","","10'000",""))
# abline(coef=c(0,1), col=2)
# dev.off()

# Table 4: Equivalence test for matched EU ETS and non-EU ETS firms on 'unobserved' variable
# # Employees
# median(ts_w1$employees0104 - cs_w1$employees0104, na.rm=T) # Median difference
#
# ## Equivalence range (±0.2 standard deviations of the pooled sample)
# 0.2*sd(append(ts_w1$employees0104, cs_w1$employees0104), na.rm=T)
#
# ## Critical equivalence range (5% significance level)
# mu = 106.75
# a <- pmax(ts_w1$employees0104 - mu, 0)
# b <- pmax(cs_w1$employees0104 - mu, 0)
# wilcoxsign_test(a ~ cs_w1$employees0104, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w1$employees0104 ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
#
# ## Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
# a <- pmax(ts_w1$employees0104 - 0.2*sd(append(ts_w1$employees0104, cs_w1$employees0104), na.rm=T), 0)
# b <- pmax(cs_w1$employees0104 - 0.2*sd(append(ts_w1$employees0104, cs_w1$employees0104), na.rm=T), 0)
# wilcoxsign_test(a ~ cs_w1$employees0104, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w1$employees0104 ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
#
# ## Equivalence test using t-test for difference in means (not reported)
# t.test(ts_w1$employees0104, cs_w1$employees0104, alternative = c("less"), mu = 0.2*sd(append(ts_w1$employees0104, cs_w1$employees0104), na.rm=T))
# t.test(ts_w1$employees0104, cs_w1$employees0104, alternative = c("greater"), mu = -0.2*sd(append(ts_w1$employees0104, cs_w1$employees0104), na.rm=T))

# Re-estimate the treatment effect for low-carbon patents while excluding matched groups in which the EU ETS firm had the option of opting-in at least one of its installations.
# "(this reduces our sample size by nearly one hundred matched pairs)"
t_w1 <- ts_w1[ts_w1$optin==0,]
c_w1 <- cs_w1[cs_w1$optin==0,]
nrow(ts_w1) - nrow(t_w1) # The 97 excluded ETS firms (and their mached non-ETS firms) did not file any low-carbon patents in 2005-2009 (although one of the ETS firms filed a single low-carbon patent in 2000-2004). Hence, in practice, the freedom to opt in or out of the EU ETS, which these firms had, is effectively independent of innovation response.

# "This returns an estimate of 2 (1, 5.99) additional low-carbon patents, and of 1 (1, 1.99) other additional patents."
a <- t_w1$green_patent_count
b <- c_w1$green_patent_count
c <- t_w1$epo_green_pat_epo0507 + t_w1$epo_green_pat_epo0809
d <- c_w1$epo_green_pat_epo0507 + c_w1$epo_green_pat_epo0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=8, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 2
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 1
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 5.99

# Rosenbaum sensitivity bounds
## There is no treatment effect large enough to produce a 5% boost in low-carbon patenting among matched EU ETS firms (i.e. 1062 additional low-carbon patents)
sum(ts_w1$epo_green_pat_epo0507 + ts_w1$epo_green_pat_epo0809) - sum(pmax(ts_w1$epo_green_pat_epo0507 + ts_w1$epo_green_pat_epo0809 - 999999, 0))

## A treatment effect of 20.4 for the matched EU ETS firms will produce a 1% boost in low-carbon patenting among matched EU ETS firms (i.e. 223 additional low-carbon patents)
sum(ts_w1$epo_green_pat_epo0507 + ts_w1$epo_green_pat_epo0809) - sum(pmax(ts_w1$epo_green_pat_epo0507 + ts_w1$epo_green_pat_epo0809 - 20.4, 0))

## Calculate p-values for Rosenbaum's sensitivity parameter gamma (using simulation method with 10000 draws from the null distribution).
gamma <- 2.65 # 4 relevant threshold values of gamma were discovered through a computationally expensive search, gamma=c(2.65, 1.4, 1.8, 1.45) (see below for their respective interpretations). Run the code below for any one of these values to verify. For each threshold value, an incremental reduction implies that the point estimate (i.e. the value that maximises the p-value) is does not reach the target treatment effect (i.e. smaller than 20.4/larger than 0), or that the target treatment effect does not lie in the 95 percent CI, as appropriate.
a <- ts_w1$green_patent_count
b <- cs_w1$green_patent_count
c <- ts_w1$epo_green_pat_epo0507 + ts_w1$epo_green_pat_epo0809
d <- cs_w1$epo_green_pat_epo0507 + cs_w1$epo_green_pat_epo0809
p.values.plus <- vector()
p.values.minus <- vector()
reps <- 10000 # Computational burden can be reduced by drawing a smaller sample from the null distribution...
for (i in seq(from = -1, to = 25, by = .01)) { # ... and/or by calculating p-values only in the vicinity of the relevant threshold (i.e. 0 or 20.4). 
	mu=i
	if (mu>=0) {
		trt <- pmax((c - a) - mu, -a)
		ctrl <- (d - b)
	}
	if (mu<0) {
		trt <- (c - a)
		ctrl <- pmax((d - b) - mu, -b)
	}
	# Wilcoxon statistic
	diff <- trt - ctrl
	ranks <- rank(abs(diff), ties.method = c("average"))
	ranks <- ranks[diff!=0]
	diff <- diff[diff!=0]
	psi <- as.numeric(diff > 0)
	wilcoxon <- sum(psi * ranks)
	
	# Simulate null distributions
    p.plus <- gamma/(1 + gamma)
	psi.plus <- matrix(rbinom(length(diff)*reps, size = 1, prob=p.plus), ncol=length(diff), nrow=reps)
	psi.minus <- abs(psi.plus - 1)
	null.plus <- psi.plus %*% ranks
	null.minus <- psi.minus %*% ranks

	# P-values
	if (wilcoxon >= mean(null.plus)) {
		pval.plus <- 2 * length(subset(null.plus, null.plus >= wilcoxon))/length(null.plus)
	}
	if (wilcoxon <= mean(null.plus)) {
		pval.plus <- 2 * length(subset(null.plus, null.plus <= wilcoxon))/length(null.plus)
	}
	if (wilcoxon >= mean(null.minus)) {
		pval.minus <- 2 * length(subset(null.minus, null.minus >= wilcoxon))/length(null.minus)
	}
	if (wilcoxon <= mean(null.minus)) {
		pval.minus <- 2 * length(subset(null.minus, null.minus <= wilcoxon))/length(null.minus)
	}
	
	p.values.plus <- append(p.values.plus, pval.plus)
	p.values.minus <- append(p.values.minus, pval.minus)
}

## It is just possible to arrive at a point estimate of the treatment effect of 20.4 when gamma=2.65.
## We are just unable to reject the null hypothesis that the treatment effect is 20.4 when gamma=1.4.
plot(seq(from = -1, to = 25, by = .01), p.values.minus, type="l")
abline(h=0.05)

## It is just possible to arrive at a point estimate of the treatment effect of 0 when gamma=1.8.
## We are just unable to reject the null hypothesis that the treatment effect is 0 when gamma=1.45.
plot(seq(from = -1, to = 25, by = .01), p.values.plus, type="l")
abline(h=0.05, add=T)

## From proposition 1 in Rosenbaum and Silber (DOI: 10.1198/jasa.2009.tm08470), and setting Delta = Lambda = x, the amplified sensitivity parameters Delta and Lambda emerge as the root of the quadtratic equation x^2 - 2*gamma*x + 1 = 0.
(-(-2*gamma)+sqrt((-2*gamma)^2 - 4))/2 # For gamma = 2.65, for instance, Delta = Lambda = 5.104077, which means that the unobserved variable, u, would need to be associated with a five-fold increase in the odds of treatment AND a five-fold increase in the odds of greater patenting response. In other words, u would predict treatment and outcome accurately 5.104077 out of 6.104077, or roughly 83 times out of 100, as we put it in the article.
(-(-2*gamma)-sqrt((-2*gamma)^2 - 4))/2 # The second root is the inverse of the first, e.g. 0.1959218 = 1 / 5.104077.

# Compare matched with unmatched EU ETS firms
## Low-carbon patenting
### Equivalence range (±0.2 standard deviations of the pooled sample, pre-treatment)
0.2*sd(ETSpatent_counts$green_pat0004)

### Critical equivalence range (5% significance level, using Wilcoxon's rank-sum test)
mu = 1.99
a <- pmax(ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==1] - mu, 0)
b <- ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==0]
c <- ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==1]
d <- pmax(ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==0] - mu, 0)
t <- factor(c(rep("treated", length(a)), rep("control", length(b))))
wilcox_test(append(a,b) ~ t, alternative = "less", distribution = c("asymptotic")) # The function wilcox_test from coin appears to exhibit some strange behaviour for this test, so we mirror the second threshold as an approximation.
wilcox_test(append(c,d) ~ t, alternative = "greater", distribution = c("asymptotic"))

## Other patenting
### Equivalence range (±0.2 standard deviations of the pooled sample, pre-treatment)
0.2*sd(ETSpatent_counts$total_pat0004 - ETSpatent_counts$green_pat0004)

### Critical equivalence range (5% significance level, using Wilcoxon's rank-sum test)
mu = 1.99
a <- pmax(ETSpatent_counts$total_pat0004[ETSpatent_counts$matched==1] - ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==1] - mu, 0)
b <- ETSpatent_counts$total_pat0004[ETSpatent_counts$matched==0] - ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==0]
c <- ETSpatent_counts$total_pat0004[ETSpatent_counts$matched==1] - ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==1]
d <- pmax(ETSpatent_counts$total_pat0004[ETSpatent_counts$matched==0] - ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==0] - mu, 0)
t <- factor(c(rep("treated", length(a)), rep("control", length(b))))
wilcox_test(append(a,b) ~ t, alternative = "less", distribution = c("asymptotic")) # The function wilcox_test from coin appears to exhibit some strange behaviour for this test, so we mirror the second threshold as an approximation.
wilcox_test(append(c,d) ~ t, alternative = "greater", distribution = c("asymptotic"))

# Re-estimate treatment effects for data matched using revenues 2000-2005 (specification v2)
## For low-carbon patents
a <- ts_w2$green_patent_count
b <- cs_w2$green_patent_count
c <- ts_w2$epo_green_pat_epo0507 + ts_w2$epo_green_pat_epo0809
d <- cs_w2$epo_green_pat_epo0507 + cs_w2$epo_green_pat_epo0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=8, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 2.75
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 1
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 5.99

## For other patents
a <- ts_w2$patent_count - ts_w2$green_patent_count 
b <- cs_w2$patent_count - cs_w2$green_patent_count
c <- ts_w2$epo_total_pat0507 + ts_w2$epo_total_pat0809 - ts_w2$epo_green_pat_epo0507 - ts_w2$epo_green_pat_epo0809
d <- cs_w2$epo_total_pat0507 + cs_w2$epo_total_pat0809 - cs_w2$epo_green_pat_epo0507 - cs_w2$epo_green_pat_epo0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=5, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 1
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 1
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 1.99

# Estimate upper bound
## For low-carbon patents
a <- append(ts_w1$green_patent_count, ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==0])
b <- append(cs_w1$green_patent_count, ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==0])
c <- append(ts_w1$epo_green_pat_epo0507 + ts_w1$epo_green_pat_epo0809, ETSpatent_counts$green_pat0509[ETSpatent_counts$matched==0])
d <- append(cs_w1$epo_green_pat_epo0507 + cs_w1$epo_green_pat_epo0809, rep.int(0, length(ETSpatent_counts$green_pat0509[ETSpatent_counts$matched==0])))
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=45, step = 0.01, distribution = "exact")
pe <- mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 13
lci <- min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 3.01
uci <- max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 39.99

## Calculate aggregate number of patents added for matched EU ETS firms
sum(c) - sum(pmax(c - pe, 0)) # Point estimate = 500
sum(c) - sum(pmax(c - lci, 0)) # Lower bound of CI = 230.37
sum(c) - sum(pmax(c - uci, 0)) # Upper bound of CI = 866.9

## Calculate aggregate change (in percent) for matched EU ETS firms
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (sum(pmax(c - pe, 0))) # Point estimate = 29.60332
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (sum(pmax(c - lci, 0))) # Lower bound of CI = 11.76179
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (sum(pmax(c - uci, 0))) # Upper bound of CI = 65.56993

## Calculate aggregate change (in percent) for EPO
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (lc_epo - (sum(c) - (sum(pmax(c - pe, 0))))) # Point estimate =  2.292631
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (lc_epo - (sum(c) - (sum(pmax(c - lci, 0))))) # Lower bound of CI = 1.043407
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (lc_epo - (sum(c) - (sum(pmax(c - uci, 0))))) # Upper bound of CI = 4.042981

# For other patents
a <- append(ts_w1$patent_count - ts_w1$green_patent_count , ETSpatent_counts$total_pat0004[ETSpatent_counts$matched==0] - ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==0])
b <- append(cs_w1$patent_count - cs_w1$green_patent_count, ETSpatent_counts$total_pat0004[ETSpatent_counts$matched==0] - ETSpatent_counts$green_pat0004[ETSpatent_counts$matched==0])
c <- append(ts_w1$epo_total_pat0507 + ts_w1$epo_total_pat0809 - ts_w1$epo_green_pat_epo0507 - ts_w1$epo_green_pat_epo0809, ETSpatent_counts$total_pat0509[ETSpatent_counts$matched==0] - ETSpatent_counts$green_pat0509[ETSpatent_counts$matched==0])
d <- append(cs_w1$epo_total_pat0507 + cs_w1$epo_total_pat0809 - cs_w1$epo_green_pat_epo0507 - cs_w1$epo_green_pat_epo0809, rep.int(0, length(ETSpatent_counts$total_pat0509[ETSpatent_counts$matched==0] - ETSpatent_counts$green_pat0509[ETSpatent_counts$matched==0])))
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=11, step = 0.01, distribution = "exact")
pe <- mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 5.75
lci <- min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 4
uci <- max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 10.99

## Calculate aggregate number of patents added for matched EU ETS firms
sum(c) - sum(pmax(c - pe, 0)) # Point estimate = 2005.75
sum(c) - sum(pmax(c - lci, 0)) # Lower bound of CI = 1558
sum(c) - sum(pmax(c - uci, 0)) # Upper bound of CI = 3144.95

## Calculate aggregate change (in percent) for matched EU ETS firms
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (sum(pmax(c - pe, 0))) # Point estimate = 3.146509
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (sum(pmax(c - lci, 0))) # Lower bound of CI = 2.427056
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (sum(pmax(c - uci, 0))) # Upper bound of CI = 5.023396

## Calculate aggregate change (in percent) for EPO
100*(sum(c) - (sum(pmax(c -pe, 0)))) / (other_epo - (sum(c) - (sum(pmax(c - pe, 0))))) # Point estimate = 0.2671401
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (other_epo - (sum(c) - (sum(pmax(c - lci, 0))))) # Lower bound of CI = 0.2073819
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (other_epo - (sum(c) - (sum(pmax(c - uci, 0))))) # Upper bound of CI = 0.4195034


# Out-of-sample response necessary for a 5% increase in low-carbon patenting
## Number of low-carbon patents that would need to be additional to have caused a 5% increase
sum(descriptive$tot_epo_green_pat_Y02[descriptive$year>2004])*0.05/1.05 # = 1062.333

## Less those added by EU ETS firms (see above for calculation)
sum(descriptive$tot_epo_green_pat_Y02[descriptive$year>2004])*0.05/1.05 - 183 # = 879.3333

## So while in-sample EU ETS firms have on average filed 0.03286638 additional patents...
183/nrow(ETSpatent_counts)

## ... out-of-sample EU ETS firms would have to have filed at least 0.318138 each, on average (see article for explanation of the denomenator)...
(sum(descriptive$tot_epo_green_pat_Y02[descriptive$year>2004])*0.05/1.05 - 183) / 2764

## ... even when assuming the upper bound estimate calculated above is appropriate for in-sample EU ETS firms, out-of-sample EU ETS firms would still have to have filed at least 0.2034491 additional low-carbon patents on average, while in-sample firms would only have filed an additional 0.08979885 low-carbon patents.
(sum(descriptive$tot_epo_green_pat_Y02[descriptive$year>2004])*0.05/1.05 - 500) / 2764
500/nrow(ETSpatent_counts)

# Table 4, Web appendix: Treatment effect estimates using ‘distant’ matches
## EU ETS firms matched to non-EU ETS firms in BG, CH, NO, and RO
a <- ts_w4$green_patent_count
b <- cs_w4$green_patent_count
c <- ts_w4$epo_green_pat_epo0507 + ts_w4$epo_green_pat_epo0809
d <- cs_w4$epo_green_pat_epo0507 + cs_w4$epo_green_pat_epo0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=-1, effect.max=5, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 1
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 0
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 1.99

## EU ETS firms matched to non-EU ETS firms in US
a <- ts_w5$green_patent_count
b <- cs_w5$green_patent_count
c <- ts_w5$epo_green_pat_epo0507 + ts_w5$epo_green_pat_epo0809
d <- cs_w5$epo_green_pat_epo0507 + cs_w5$epo_green_pat_epo0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=-3, effect.max=3, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = -1
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = -1.99
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 0.99

## Analysis of v3 (treatment year 2003)
## Estimate treatment effect for low-carbon patents
a <- ts_w3$epo_green_pat_epo0002
b <- cs_w3$epo_green_pat_epo0002
c <- ts_w3$epo_green_pat_epo0304
d <- cs_w3$epo_green_pat_epo0304
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=-5, effect.max=3, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = -1.75
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = -5
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 1.99

# Table 5, Web appendix: Estimates with different definitions of “low-carbon technologies”
a <- ts_w1$epo_green_pat_epoipc0002 + ts_w1$epo_green_pat_epoipc0304
b <- cs_w1$epo_green_pat_epoipc0002 + cs_w1$epo_green_pat_epoipc0304
c <- ts_w1$epo_green_pat_epoipc0507 + ts_w1$epo_green_pat_epoipc0809
d <- cs_w1$epo_green_pat_epoipc0507 + cs_w1$epo_green_pat_epoipc0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=5, step = 0.01, distribution = "exact")
pe <- mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 1.75
lci <- min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 1
uci <- max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 3.99

## Calculate aggregate change (in percent) for matched EU ETS firms
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (sum(pmax(c - pe, 0))) # Point estimate = 32.38938
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (sum(pmax(c - lci, 0))) # Lower bound of CI = 20.25723
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (sum(pmax(c - uci, 0))) # Upper bound of CI = 62.47448

## Calculate aggregate change (in percent) for EPO
lc_epoipc <- sum(descriptive$tot_epo_green_pat_Y02plus[descriptive$year>2004]) # 26733 low-carbon patents filed at the EPO in 2005-2009
100*(sum(c) - (sum(pmax(c - pe, 0)))) / (lc_epoipc - (sum(c) - (sum(pmax(c - pe, 0)))))  # Point estimate = 0.3434491
100*(sum(c) - (sum(pmax(c - lci, 0)))) / (lc_epoipc - (sum(c) - (sum(pmax(c - lci, 0)))))  # Lower bound of CI = 0.2362205
100*(sum(c) - (sum(pmax(c - uci, 0)))) / (lc_epoipc - (sum(c) - (sum(pmax(c - uci, 0)))))  # Upper bound of CI = 0.5408589

# Table 6, Web appendix: Changes in quality of low-carbon patents
## Citations
a <- ts_w1$epo_green_pat_epo_cit0002 + ts_w1$epo_green_pat_epo_cit0304
b <- cs_w1$epo_green_pat_epo_cit0002 + cs_w1$epo_green_pat_epo_cit0304
c <- ts_w1$epo_green_pat_epo_cit0507 + ts_w1$epo_green_pat_epo_cit0809
d <- cs_w1$epo_green_pat_epo_cit0507 + cs_w1$epo_green_pat_epo_cit0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=20, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 2.75
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 0
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 17.99 (in this case, the p-value never drops below 0.05 above the point estimate, but drops to p=0.05531311 above 17.99, so this is taken as the upper bound of the confidence interval here.)

## Family size
a <- ts_w1$epo_green_pat_epo_fam0002 + ts_w1$epo_green_pat_epo_fam0304
b <- cs_w1$epo_green_pat_epo_fam0002 + cs_w1$epo_green_pat_epo_fam0304
c <- ts_w1$epo_green_pat_epo_fam0507 + ts_w1$epo_green_pat_epo_fam0809
d <- cs_w1$epo_green_pat_epo_fam0507 + cs_w1$epo_green_pat_epo_fam0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=0, effect.max=40, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 11.5
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 4
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 35

# Estimate treatment effect on “pollution control technologies”
a <- ts_w1$epo_pct_pat0002 + ts_w1$epo_pct_pat0304
b <- cs_w1$epo_pct_pat0002 + cs_w1$epo_pct_pat0304
c <- ts_w1$epo_pct_pat0507 + ts_w1$epo_pct_pat0809
d <- cs_w1$epo_pct_pat0507 + cs_w1$epo_pct_pat0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=-15, effect.max=5, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 0.75
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = 12 (p-value drops to a minimum of 0.08760923 at 12.01)
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 4.99


############### Replication code for section 5 ###############
# "Secondly, EU ETS firms have filed over 120,000 patents with the EPO since 2000..."
sum(descriptive$total_patents_ets[descriptive$year>1999]) # = 120115
sum(ETSpatent_counts$total_pat0004 + ETSpatent_counts$total_pat0509) # or equivalently

# "circa 2.5% of which protect low-carbon technologies."
100*sum(descriptive$greenpat_epo_ets[descriptive$year>1999]) / sum(descriptive$total_patents_ets[descriptive$year>1999]) # = 2.631645
100*sum(ETSpatent_counts$green_pat0004 + ETSpatent_counts$green_pat0509) / sum(ETSpatent_counts$total_pat0004 + ETSpatent_counts$total_pat0509) # or equivalently

# Figure 7: Comparison of matched co-patenters and non-co-patenting firms
postscript(file = "balanceplots_copatent.eps", width = 8.4, height = 3.3)
par(mfrow=c(nrows=1,ncols=3), oma = c(0,0,0,0))
# qqplot(log(cs_w6$revenue0104), log(ts_w6$revenue0104),
# xlim=c(min(min(log(cs_w6$revenue0104)),min(log(ts_w6$revenue0104))),max(max(log(cs_w6$revenue0104)),max(log(ts_w6$revenue0104)))), ylim=c(min(min(log(cs_w6$revenue0104)),min(log(ts_w6$revenue0104))),max(max(log(cs_w6$revenue0104)),max(log(ts_w6$revenue0104)))), xlab = "Turnover of non-copatenters (Mil. Euro)", ylab = "Turnover of EU ETS co-patenters (Mil. Euro)", main = "(a)", xaxt = "n", yaxt = "n"
# )
# axis(side = 1, at = c(log(1),log(100),log(10000),log(1000000),log(100000000)), labels = c("0","0.1","10","1'000","100'000"))
# axis(side = 2, at = c(log(1),log(100),log(10000),log(1000000),log(100000000)), labels = c("0","0.1","10","1'000","100'000"))
# abline(coef=c(0,1), col=2)
qqplot(log(cs_w6$patent_count+1), log(ts_w6$patent_count+1), xlim=c(0,max(max(log(cs_w6$patent_count+1)),max(log(ts_w6$patent_count+1)))), ylim=c(0,max(max(log(cs_w6$patent_count+1)),max(log(ts_w6$patent_count+1)))), xlab = "Patents by firms non-copatenters", ylab = "Patents by EU ETS co-patenters", main = "(b)", xaxt = "n", yaxt = "n")
axis(side = 1, at = c(log(1),log(10),log(100),log(1000),log(10000)), labels = c("0","10","100","1'000","10'000"))
axis(side = 2, at = c(log(1),log(10),log(100),log(1000),log(10000)), labels = c("0","10","100","1'000","10'000"))
abline(coef=c(0,1), col=2)
qqplot(log(cs_w6$green_patent_count+1), log(ts_w6$green_patent_count+1), xlim=c(0,max(max(log(cs_w6$green_patent_count+1)),max(log(ts_w6$green_patent_count+1)))), ylim=c(0,max(max(log(cs_w6$green_patent_count+1)),max(log(ts_w6$green_patent_count+1)))), xlab = "Low-carbon patents by non-copatenters", ylab = "Low-carbon patents by EU ETS co-patenters", main = "(c)", xaxt = "n", yaxt = "n")
axis(side = 1, at = c(log(1),log(10),log(100)), labels = c("0","10","100"))
axis(side = 2, at = c(log(1),log(10),log(100)), labels = c("0","10","100"))
abline(coef=c(0,1), col=2)
dev.off()

# Table 6: Equivalence tests for matched co-patenters and non-co-patenting firms
# ## Turnover
# median(ts_w6$revenue0104 - cs_w6$revenue0104) # Median difference
#
# ### Equivalence range (±0.2 standard deviations of the pooled sample)
# 0.2*sd(append(ts_w6$revenue0104, cs_w6$revenue0104)
#
# ### Critical equivalence range (5% significance level)
# mu = 13248
# a <- pmax(ts_w6$revenue0104 - mu, 0)
# b <- pmax(cs_w6$revenue0104 - mu, 0)
# wilcoxsign_test(a ~ cs_w6$revenue0104, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w6$revenue0104 ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
# 
# ### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
# a <- pmax(ts_w6$revenue0104 - 0.2*sd(append(ts_w6$revenue0104, cs_w6$revenue0104)), 0)
# b <- pmax(cs_w6$revenue0104 - 0.2*sd(append(ts_w6$revenue0104, cs_w6$revenue0104)), 0)
# wilcoxsign_test(a ~ cs_w6$revenue0104, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w6$revenue0104 ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
#
# ### Equivalence test using t-test for difference in means (not reported)
# t.test(ts_w6$revenue0104, cs_w6$revenue0104, alternative = c("less"), mu = 0.2*sd(append(ts_w6$revenue0104, cs_w6$revenue0104)))
# t.test(ts_w6$revenue0104, cs_w6$revenue0104, alternative = c("greater"), mu = -0.2*sd(append(ts_w6$revenue0104, cs_w6$revenue0104)))

## Patent count
median(ts_w6$patent_count - cs_w6$patent_count) # Median difference

### Equivalence range (±0.2 standard deviations of the pooled sample)
0.2*sd(append(ts_w6$patent_count, cs_w6$patent_count))

### Critical equivalence range (5% significance level)
mu = 0.01 # We are able to reject both directional hypotheses for mu=0.01.
a <- pmax(ts_w6$patent_count - mu, 0)
b <- pmax(cs_w6$patent_count - mu, 0)
wilcoxsign_test(a ~ cs_w6$patent_count, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
wilcoxsign_test(ts_w6$patent_count ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))

### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
a <- pmax(ts_w6$patent_count - 0.2*sd(append(ts_w6$patent_count, cs_w6$patent_count)), 0)
b <- pmax(cs_w6$patent_count - 0.2*sd(append(ts_w6$patent_count, cs_w6$patent_count)), 0)
wilcoxsign_test(a ~ cs_w6$patent_count, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
wilcoxsign_test(ts_w6$patent_count ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))

### Equivalence test using t-test for difference in means (not reported)
t.test(ts_w6$patent_count, cs_w6$patent_count, alternative = c("less"), mu = 0.2*sd(append(ts_w6$patent_count, cs_w6$patent_count)))
t.test(ts_w6$patent_count, cs_w6$patent_count, alternative = c("greater"), mu = -0.2*sd(append(ts_w6$patent_count, cs_w6$patent_count)))

## Low-carbon patent count
median(ts_w6$green_patent_count - cs_w6$green_patent_count) # Median difference

### Equivalence range (±0.2 standard deviations of the pooled sample)
0.2*sd(append(ts_w6$green_patent_count, cs_w6$green_patent_count))

### Critical equivalence range (5% significance level)
mu = 0.99
a <- pmax(ts_w6$green_patent_count - mu, 0)
b <- pmax(cs_w6$green_patent_count - mu, 0)
wilcoxsign_test(a ~ cs_w6$green_patent_count, alternative = "less", zero.method = c("Pratt"), distribution = c("exact"))
wilcoxsign_test(ts_w6$green_patent_count ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("exact"))

### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
a <- pmax(ts_w6$green_patent_count - 0.2*sd(append(ts_w6$green_patent_count, cs_w6$green_patent_count)), 0)
b <- pmax(cs_w6$green_patent_count - 0.2*sd(append(ts_w6$green_patent_count, cs_w6$green_patent_count)), 0)
wilcoxsign_test(a ~ cs_w6$green_patent_count, alternative = "less", zero.method = c("Pratt"), distribution = c("exact"))
wilcoxsign_test(ts_w6$green_patent_count ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("exact"))

wilcoxsign_test(ts_w6$green_patent_count ~ cs_w6$green_patent_count, zero.method = c("Pratt"), distribution = c("exact"))

### Equivalence test using t-test for difference in means (not reported)
t.test(ts_w6$green_patent_count, cs_w6$green_patent_count, alternative = c("less"), mu = 0.2*sd(append(ts_w6$green_patent_count, cs_w6$green_patent_count)))
t.test(ts_w6$green_patent_count, cs_w6$green_patent_count, alternative = c("greater"), mu = -0.2*sd(append(ts_w6$green_patent_count, cs_w6$green_patent_count)))

# ## Year of incorporation
# median(ts_w6$yearofincorporation - cs_w6$yearofincorporation, na.rm=T) # Median difference
#
# ### Equivalence range (±0.2 standard deviations of the pooled sample)
# 0.2*sd(append(ts_w6$yearofincorporation, cs_w6$yearofincorporation), na.rm=T)
#
# ### Critical equivalence range (5% significance level)
# mu = 0.49
# a <- pmax(ts_w6$yearofincorporation - mu, 0)
# b <- pmax(cs_w6$yearofincorporation - mu, 0)
# wilcoxsign_test(a ~ cs_w6$yearofincorporation, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w6$yearofincorporation ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
#
# ### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations (for completeness)
# a <- pmax(ts_w6$yearofincorporation - 0.2*sd(append(ts_w6$yearofincorporation, cs_w6$yearofincorporation), na.rm=T), 0)
# b <- pmax(cs_w6$yearofincorporation - 0.2*sd(append(ts_w6$yearofincorporation, cs_w6$yearofincorporation), na.rm=T), 0)
# wilcoxsign_test(a ~ cs_w6$yearofincorporation, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
# wilcoxsign_test(ts_w6$yearofincorporation ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))
#
# ### Equivalence test using t-test for difference in means (not reported)
# t.test(ts_w6$yearofincorporation, cs_w6$yearofincorporation, alternative = c("less"), mu = 0.2*sd(append(ts_w6$yearofincorporation, cs_w6$yearofincorporation), na.rm=T))
# t.test(ts_w6$yearofincorporation, cs_w6$yearofincorporation, alternative = c("greater"), mu = -0.2*sd(append(ts_w6$yearofincorporation, cs_w6$yearofincorporation), na.rm=T))

# Estimate treatment effect for low-carbon patents
a <- ts_w6$epo_green_pat_epo0002 + ts_w6$epo_green_pat_epo0304
b <- cs_w6$epo_green_pat_epo0002 + cs_w6$epo_green_pat_epo0304
c <- ts_w6$epo_green_pat_epo0507 + ts_w6$epo_green_pat_epo0809
d <- cs_w6$epo_green_pat_epo0507 + cs_w6$epo_green_pat_epo0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=-5, effect.max=5, step = 0.01, distribution = "exact")
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = 0.99
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = -0.99
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = 1.99

# Estimate treatment effect for other patents
a <- ts_w6$epo_total_pat0002 + ts_w6$epo_total_pat0304 - ts_w6$epo_green_pat_epo0002 - ts_w6$epo_green_pat_epo0304
b <- cs_w6$epo_total_pat0002 + cs_w6$epo_total_pat0304 - cs_w6$epo_green_pat_epo0002 - cs_w6$epo_green_pat_epo0304
c <- ts_w6$epo_total_pat0507 + ts_w6$epo_total_pat0809 - ts_w6$epo_green_pat_epo0507 - ts_w6$epo_green_pat_epo0809
d <- cs_w6$epo_total_pat0507 + cs_w6$epo_total_pat0809 - cs_w6$epo_green_pat_epo0507 - cs_w6$epo_green_pat_epo0809
p.values <- tobit.wilcox(pre.trt=a, post.trt=c, pre.ctrl=b, post.ctrl=d, effect.min=-2, effect.max=1, step = 0.01, distribution = "asympt") # The set is too large to compute exact p-values, so we use an asymptotic approximation.
mean(p.values[p.values[,2]==max(p.values[,2]), 1]) # Point estimate = -0.745
min(p.values[p.values[,2]>=0.05, 1]) # Lower bound of CI = −0.99
max(p.values[p.values[,2]>=0.05, 1]) # Upper bound of CI = −0.01


############### Replication code for appendix D ###############
# NB: This appendix was added at the suggestion of a reviewer. To do the required calculations here, we must unfortunately introduce a minor inconsistency in how patents are counted. In the data set "ETSpatents", used here, the unit of observation is a patent. When a patent is has been assigned to multiple technology classes, there is one row for each assignment, with the application ID number repeated. This means that there are more rows than patents. Moreover, because some patents are held jointly by multiple firms, there will be fewer application ID numbers than patents, following the earlier convention used throughout of adding up the patent counts across firms. The former effect tends to dominate, because multiple technology class assignment is more common than jointly held patents.
# NB: Both BvD ID numbers and NACE codes have been removed from "ETSpatents", for licensing reasons. The reader can still examine the data and identify each of the patents held by EU ETS firms, but cannot identify the firms. This prevents complete replication.
#
# # Table 2, Web appendix: Additional low-carbon patents by technology (conditional estimates)
# tech <- c("Y02E60:5", "Y02E20:1", "Y02E10:7", "Y02E60:1", "Y02E10:4", "Y02E10:5", "Y02E20:3", "Y02E50:1", "Y02E60:3", "Y02B70:1", "Y02C20:1", "Y02E50:3", "Y02C10:0", "Y02E30:3", "Y02E30:4", "Y02E40:6", "Y02E10:3", "Y02T90:3", "Y02B30:1", "Y02C10:1", "Y02E40:1", "Y02E40:4", "Y02E70:3", "Y02E10:2", "Y02B30:6", "Y02C20:2", "Y02E10:1", "Y04S10:1", "Y02B30:7", "Y02T10:9", "Y02E40:3", "Y02E40:7", "Y02B90:1", "Y02E70:1", "Y02E40:2", "Y02E40:5", "Y02E30:1", "Y02C20:3", "Y02B40:9", "Y02E10:6", "Y04S40:1", "Y04S30:1", "Y04S20:2") # Define technology categories
# output <- data.frame()
# for (i in seq_along(tech)) {
# 	x <- data.table(ETSpatents)
# 	# Assign each patent to only one technology category, always choosing the specified technology when applicable.
# 	x$techx[x$Ycode==tech[i]] <- 1
# 	x$techx[x$Ycode!=tech[i]] <- 0
# 	setkey(x, bvdid, appln_id)
# 	x1 <- x[, list(techx=max(techx)), by=list(bvdid, appln_id)]
# 	# Calculate the number of qualifying and non-qualifying low-carbon patents for each firm
# 	x1$not_techx  <- 1 - x1$techx
# 	setkey(x1, bvdid)
# 	x <- x1[, list(techx=sum(techx), not_techx=sum(not_techx)), by=list(bvdid)]
# 	x <- data.frame(x)
# 	rm(x1)
# 	# Calculate firm-level minimum
# 	x$min_techx <- 0
# 	x$min_techx[x$techx==1 & x$not_techx<2] <- 1
# 	x$min_techx[x$techx>1 & x$not_techx==1] <- 1
# 	x$min_techx[x$techx>1 & x$not_techx==0] <- 2
# 	# Calculate firm-level average
# 	x$avg_techx <- 0
# 	if (length(x$avg_techx[x$techx>0 & x$not_techx==0])>0) { x$avg_techx[x$techx>0 & x$not_techx==0] <- pmin(x$techx[x$techx>0 & x$not_techx==0], 2) }
# 	x$avg_techx[x$techx==1 & x$not_techx>0] <- x$not_techx[x$techx==1 & x$not_techx>0] / choose(x$techx[x$techx==1 & x$not_techx>0] + x$not_techx[x$techx==1 & x$not_techx>0], 2)
# 	x$avg_techx[x$techx>1 & x$not_techx>0] <- ((x$not_techx[x$techx>1 & x$not_techx>0] * x$techx[x$techx>1 & x$not_techx>0]) / choose(x$techx[x$techx>1 & x$not_techx>0] + x$not_techx[x$techx>1 & x$not_techx>0], 2)) + 2*(choose(x$techx[x$techx>1 & x$not_techx>0], 2) / choose(x$techx[x$techx>1 & x$not_techx>0] + x$not_techx[x$techx>1 & x$not_techx>0], 2))
# 	# Calculate firm-level maximum
# 	x$max_techx <- pmin(x$techx, 2)
# 	# Save results to output table
# 	output <- rbind(output, cbind(tech[i], sum(x$techx), sum(x$min_techx), round(sum(x$avg_techx),2), sum(x$max_techx)))
# }
# rm(i, x)
# colnames(output) <- c("Technology", "Total number", "Min additional", "Exp additional", "Max additional")
# ## NB: The expected number of additional patents summed across technologies slightly exceeds the original estimate because patents with multiple technology designations are always assigned to the specified technology for the purpose of calculation, which results in some double counting.
# # sum(as.numeric(as.vector(output[,4])))
#
# # Table 3, Web appendix: Additional low-carbon patents by sector (conditional estimates)
# sector <- c("2059", "2651", "2910", "2332", "7211", "2013", "2824", "2110", "6420", "2611", "2012", "3030", "2511", "7210", "1712", "2790", "2550", "3511", "2016", "2530", "2311", "3522", "2014", "4690", "4910", "3530", "7112", "2319", "2434", "1920", "2712", "2314", "2811", "1081", "3513", "2015", "2410", "2740", "2930", "1621", "0610", "2351", "3500", "2815", "2812", "2313", "1089", "1722", "2670", "2830", "2432", "3020", "3510") # Define sectors
# ETSpatents <- subset(ETSpatents, nace!="")
# output_nace <- data.frame()
# for (i in seq_along(sector)) {
# 	x <- data.table(ETSpatents)
# 	# Assign each patent to only one economic sector, always choosing the specified sector when applicable.
# 	x$nacex[x$nace==sector[i]] <- 1
# 	x$nacex[x$nace!=sector[i]] <- 0
# 	setkey(x, bvdid, appln_id)
# 	x1 <- x[, list(nacex=max(nacex)), by=list(bvdid, appln_id)]
# 	# Calculate the number of qualifying and non-qualifying low-carbon patents for each firm
# 	x1$not_nacex  <- 1 - x1$nacex
# 	setkey(x1, bvdid)
# 	x <- x1[, list(nace=sum(nacex), not_nace=sum(not_nacex)), by=list(bvdid)]
# 	x <- data.frame(x)
# 	rm(x1)
# 	# Calculate the number of additional qualifying low-carbon patents for each firm
# 	x$add_nace <- pmin(x$nace, 2)
# 	# Save results to output table
# 	output_nace <- rbind(output_nace, cbind(sector[i], sum(x$nace), sum(x$add_nace)))
# }
# rm(i, x)
# colnames(output_nace) <- c("Sector", "Total number", "Additional")
# ## Check: The total number of additional patents summed across sectors is slightly lower than the original estimate because patents attributed to firms without a designated NACE code are dropped from this calculation.
# # sum(as.numeric(as.vector(output_nace[,3])))


############### Replication code for appendix E ###############
# Table 7, Web appendix: Equivalence tests for matched EU ETS and non-EU ETS firms on omitted variables
## Number of innovation locations 2000-2004
median(ts_w1$inv_loc - cs_w1$inv_loc) # Median difference = 0

### Equivalence range
0.2*sd(append(ts_w1$inv_loc, cs_w1$inv_loc)) # = 11.81586

### Critical equivalence range (5% significance level)
mu = 5
a <- pmax(ts_w1$inv_loc - mu, 0)
b <- pmax(cs_w1$inv_loc - mu, 0)
wilcoxsign_test(a ~ cs_w1$inv_loc, alternative = "less", zero.method = c("Pratt"), distribution = c("exact"))
wilcoxsign_test(ts_w1$inv_loc ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("exact"))

### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations
a <- pmax(ts_w1$inv_loc - 0.2*sd(append(ts_w1$inv_loc, cs_w1$inv_loc)), 0)
b <- pmax(cs_w1$inv_loc - 0.2*sd(append(ts_w1$inv_loc, cs_w1$inv_loc)), 0)
wilcoxsign_test(a ~ cs_w1$inv_loc, alternative = "less", zero.method = c("Pratt"), distribution = c("exact"))
wilcoxsign_test(ts_w1$inv_loc ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("exact"))

## Firm growth
median(ts_w1$avg_growth - cs_w1$avg_growth, na.rm=T) # Median difference = -0.009969071

### Equivalence range
0.2*sd(append(ts_w1$avg_growth, cs_w1$avg_growth), na.rm=T)

### Critical equivalence range (5% significance level)
mu = 0.0191
a <- ts_w1$avg_growth - mu
b <- cs_w1$avg_growth - mu
wilcoxsign_test(a ~ cs_w1$avg_growth, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
wilcoxsign_test(ts_w1$avg_growth ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))

### Equivalence test using Wilcoxon test with Tobit-modified treatment effect for an equivalence range of 0.2 standard deviations
a <- ts_w1$avg_growth - 0.2*sd(append(ts_w1$avg_growth, cs_w1$avg_growth), na.rm=T)
b <- cs_w1$avg_growth - 0.2*sd(append(ts_w1$avg_growth, cs_w1$avg_growth), na.rm=T)
wilcoxsign_test(a ~ cs_w1$avg_growth, alternative = "less", zero.method = c("Pratt"), distribution = c("asympt"))
wilcoxsign_test(ts_w1$avg_growth ~ b, alternative = "greater", zero.method = c("Pratt"), distribution = c("asympt"))